import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook


class Rank:
    def __init__(self):
        self.url = 'https://www.shanghairanking.cn/rankings/bcur/2023'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/80.0.3987.132 Safari/537.36'
        }

    def crawl(self):
        req = requests.get(self.url, headers=self.headers).content.decode("utf-8")
        soup = BeautifulSoup(req, 'lxml')

        # 创建一个新的工作簿
        wb = Workbook()

        # 获取活动工作表（默认创建的第一个工作表）
        ws = wb.active
        ws.title = "数据表"

        # 写入标题行
        ws["A1"] = "排名"
        ws["B1"] = "大学名称"
        ws["C1"] = "省份"
        ws["D1"] = "类型"
        ws["E1"] = "总分"

        # 查找当页的数据
        # 查找表格
        table = soup.find('table', attrs={'class': 'rk-table'})
        # 获取表格中的数据
        for row in table.find_all('tr')[1:]:
            tds = row.find_all('td')
            rank = tds[0].text.strip()
            school_name = tds[1].text.strip()
            province_city = tds[2].text.strip()
            type = tds[3].text.strip()
            total_score = tds[4].text.strip()

            # 写入数据行
            ws.append([rank, school_name, province_city, type, total_score])

            # 打印获取到的数据
            print(f'{rank} {school_name} {province_city} {type} {total_score}')

        # 数据写入到excel文件
        wb.save('data.xlsx')

if __name__ == '__main__':
    rank = Rank()
    rank.crawl()